This notebook accompanies Naveena Sadasivam and Clayton Aldern’s Grist investigation into Oxbow Calcining’s Port Arthur petcoke facility. Naveena reported and wrote the story; Clay authored this repository.
knitr::opts_chunk$set(fig.path = here::here("img/png/"),
dev = "png",
dpi = 300)
library(here)
library(tidyverse)
library(skimr)
library(lubridate)
library(circular)
library(mlr3verse)
library(mlr3learners)
library(mlr3pipelines)
library(iml)
library(ggplot2)
# make sure you're working with the most recent version of {mlr3spatiotempcv} if you're working in the experimental chunk at the end.
# remotes::install_github("mlr-org/mlr3spatiotempcv")
# library(mlr3spatiotempcv)PALETTE = c("#d3c8ff",
"#ac00e8",
"#f5515b",
"#3c3830")
BINS = 36 #bins
INCREMENT_WIND = 8 #mph
INCREMENT_SO2 = 5 #ppb
CLASS_MIN = 60 #instances
AUTOCORR_WINDOW = 10 #hours
N_KILNS_OFF = 1 #kilnsHere’s the dataset we grabbed from that 5,000-page PDF.
w <- read_csv(here('dat/clean/csv/windspeedData.csv'),
col_types = cols(
Date = col_character(),
Count = col_double(),
`Sulfur Dioxide` = col_character(),
`Wind Speed – Scalar` = col_double(),
`Wind Direction – Resultant` = col_double(),
`Peak Wind Gust` = col_double(),
`Outdoor Temperature (ºF)` = col_double(),
Grouping = col_character()
)
)## Warning: One or more parsing issues, see `problems()` for details
colnames(w) <- c("date",
"count",
"so2",
"speed",
"dir",
"peak",
"temp",
"grouping")
w$date <- w$date %>% parse_date_time(orders = c("%m/%d/%y %H:%M",
"%m/%d/%Y %H:%M"))
w$so2 <- w$so2 %>% gsub("‐","-",.) %>% as.numeric()Let’s add some flags based on the Grouping variable corresponding to whether a kiln is off at a given point in time. We also want to filter the dataset to the time period after ‘experiments’ have ended.
w <- w %>% mutate(K2Off = ifelse(grepl("K2Off",
grouping,
fixed = T),
T,
F))
w <- w %>% mutate(K3Off = ifelse(grepl("K3Off",
grouping,
fixed = T),
T,
F))
w <- w %>% mutate(K4Off = ifelse(grepl("K4Off",
grouping,
fixed = T),
T,
F))
w <- w %>% mutate(K5Off = ifelse(grepl("K5Off",
grouping,
fixed = T),
T,
F))
w <- w %>% mutate(anyOff = ifelse(grepl("Off",
grouping,
fixed = T),
T,
F))
w <- w %>% mutate(allOff = ifelse(grepl("K2Off‐K3Off‐K4Off‐K5Off",
grouping,
fixed = T),
T,
F))
w <- w %>% mutate(K3K5Off = ifelse(grepl("K2HS‐K3Off‐K4HS‐K5Off",
grouping,
fixed = T),
T,
F))
stash <- w #in case we need a copy of the original df later; it's not too big
w <- w %>% filter(date > as.Date("2018-08-01")) #i.e. post-'experiments'
wK2Off <- w %>% filter(K2Off)
wK3Off <- w %>% filter(K3Off)
wK4Off <- w %>% filter(K4Off)
wK5Off <- w %>% filter(K5Off)
wAnyOff <- w %>% filter(anyOff)
wAllOn <- w %>% filter(!anyOff)
wAllOff <- w %>% filter(allOff)
wK3K5Off <- w %>% filter(K3K5Off)
wNotK4Off <- w %>% filter(K2Off | K3Off | K5Off)And let’s take a peek at some windrose plots.
par(mfrow=c(4,2))
windrose(circular(wAllOn$dir,
units = "degrees",
template = "geographics"),
wAllOn$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
right = TRUE,
main = paste0("All Kilns On, n = ",nrow(wAllOn)))
windrose(circular(wAllOff$dir,
units = "degrees",
template = "geographics"),
wAllOff$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("All Kilns Off, n = ",nrow(wAllOff)))
windrose(circular(wAnyOff$dir,
units = "degrees",
template = "geographics"),
wAnyOff$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("Any Kiln(s) Off, n = ",nrow(wAnyOff)))
windrose(circular(wK3K5Off$dir,
units = "degrees",
template = "geographics"),
wK3K5Off$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("K3 and K5 Off, n = ",nrow(wK3K5Off)))
windrose(circular(wK2Off$dir,
units = "degrees",
template = "geographics"),
wK2Off$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("K2 Off, n = ",nrow(wK2Off)))
windrose(circular(wK3Off$dir,
units = "degrees",
template = "geographics"),
wK3Off$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("K3 Off, n = ",nrow(wK3Off)))
windrose(circular(wK4Off$dir,
units = "degrees",
template = "geographics"),
wK4Off$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("K4 Off, n = ",nrow(wK4Off)))
windrose(circular(wK5Off$dir,
units = "degrees",
template = "geographics"),
wK5Off$speed,
bins = BINS,
increment = INCREMENT_WIND,
ticks = F,
label.freq = T,
fill.col = PALETTE,
main = paste0("K5 Off, n = ",nrow(wK5Off)))